In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from warnings import filterwarnings 
filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
D:\anaconda files\lib\site-packages\scipy\__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.25.0 is required for this version of SciPy (detected version 1.26.4
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
In [2]:
data = pd.read_csv("C:\\Users\\laxma\\Downloads\\CellPhone_train.csv")
data
Out[2]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi price_range
0 842 0 2.2 0 1 0 7 0.6 188 2 ... 20 756 2549 9 7 19 0 0 1 1
1 1021 1 0.5 1 0 1 53 0.7 136 3 ... 905 1988 2631 17 3 7 1 1 0 2
2 563 1 0.5 1 2 1 41 0.9 145 5 ... 1263 1716 2603 11 2 9 1 1 0 2
3 615 1 2.5 0 0 0 10 0.8 131 6 ... 1216 1786 2769 16 8 11 1 0 0 2
4 1821 1 1.2 0 13 1 44 0.6 141 2 ... 1208 1212 1411 8 2 15 1 1 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1995 794 1 0.5 1 0 1 2 0.8 106 6 ... 1222 1890 668 13 4 19 1 1 0 0
1996 1965 1 2.6 1 0 0 39 0.2 187 4 ... 915 1965 2032 11 10 16 1 1 1 2
1997 1911 0 0.9 1 1 1 36 0.7 108 8 ... 868 1632 3057 9 1 5 1 1 0 3
1998 1512 0 0.9 0 4 1 46 0.1 145 5 ... 336 670 869 18 10 19 1 1 1 0
1999 510 1 2.0 1 5 1 45 0.9 168 6 ... 483 754 3919 19 4 2 1 1 1 3

2000 rows × 21 columns

In [3]:
data.head()
Out[3]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi price_range
0 842 0 2.2 0 1 0 7 0.6 188 2 ... 20 756 2549 9 7 19 0 0 1 1
1 1021 1 0.5 1 0 1 53 0.7 136 3 ... 905 1988 2631 17 3 7 1 1 0 2
2 563 1 0.5 1 2 1 41 0.9 145 5 ... 1263 1716 2603 11 2 9 1 1 0 2
3 615 1 2.5 0 0 0 10 0.8 131 6 ... 1216 1786 2769 16 8 11 1 0 0 2
4 1821 1 1.2 0 13 1 44 0.6 141 2 ... 1208 1212 1411 8 2 15 1 1 0 1

5 rows × 21 columns

In [4]:
data.tail()
Out[4]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi price_range
1995 794 1 0.5 1 0 1 2 0.8 106 6 ... 1222 1890 668 13 4 19 1 1 0 0
1996 1965 1 2.6 1 0 0 39 0.2 187 4 ... 915 1965 2032 11 10 16 1 1 1 2
1997 1911 0 0.9 1 1 1 36 0.7 108 8 ... 868 1632 3057 9 1 5 1 1 0 3
1998 1512 0 0.9 0 4 1 46 0.1 145 5 ... 336 670 869 18 10 19 1 1 1 0
1999 510 1 2.0 1 5 1 45 0.9 168 6 ... 483 754 3919 19 4 2 1 1 1 3

5 rows × 21 columns

In [5]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_screen   2000 non-null   int64  
 19  wifi           2000 non-null   int64  
 20  price_range    2000 non-null   int64  
dtypes: float64(2), int64(19)
memory usage: 328.2 KB
In [6]:
data.isnull().sum()
Out[6]:
battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64
In [7]:
data.duplicated().sum()
Out[7]:
0
In [8]:
data.int_memory.sum()
Out[8]:
64093
In [9]:
data.columns
Out[9]:
Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')
In [10]:
#VISUALIZATION
In [11]:
plt.scatter(data['battery_power'],data['mobile_wt'],color='red')
plt.xticks(rotation=90)
plt.show()
In [12]:
plt.bar(data['pc'],data['px_height'])
plt.xticks(rotation=90)
plt.show()
In [13]:
fig=px.bar(data,x='int_memory',y='ram',color='int_memory')
fig.show()
In [14]:
fig=px.violin(data,x='clock_speed',y='wifi',color='clock_speed')
fig.show()
In [15]:
plt.figure(figsize=(10,4))
sns.countplot(x='talk_time', data=data, color='cyan')
plt.xticks(rotation=90)
plt.show()
In [16]:
sns.barplot(data['sc_h'],data['sc_w'],color='r')
plt.xticks(rotation=90)
plt.show()
In [17]:
sns.lineplot(x='mobile_wt', y='four_g', data=data)
Out[17]:
<AxesSubplot:xlabel='mobile_wt', ylabel='four_g'>
In [18]:
plt.figure(figsize=(8, 4))
sns.scatterplot(data=data, x='pc', y='m_dep')
plt.xlabel('pc')
plt.ylabel('m_dep')
plt.show()
In [19]:
sns.displot(data["n_cores"])
Out[19]:
<seaborn.axisgrid.FacetGrid at 0x21786a1d670>
In [20]:
sns.countplot(x='dual_sim',data=data)
plt.xticks(rotation=90)
Out[20]:
(array([0, 1]), [Text(0, 0, '0'), Text(1, 0, '1')])
In [21]:
sns.boxplot(x='blue',y='fc',data=data)
Out[21]:
<AxesSubplot:xlabel='blue', ylabel='fc'>
In [22]:
sns.violinplot(x='price_range',y='battery_power',data=data)
Out[22]:
<AxesSubplot:xlabel='price_range', ylabel='battery_power'>
In [23]:
#MODEL BUILDING
In [24]:
X = data.iloc[:,0:9]
y = data.iloc[:,9]

Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42)
In [25]:
sc = StandardScaler()

Xtr = sc.fit_transform(Xtr)
Xte = sc.fit_transform(Xte)
In [26]:
clf=KNeighborsClassifier(n_neighbors=11, p=2, metric='euclidean')
In [27]:
clf.fit(Xtr, ytr)
pred = clf.predict(Xte) 
In [28]:
print(confusion_matrix(pred, yte))
[[15 12 10  7  8  9 12 10]
 [ 8  9  6 14  9  8  4 11]
 [ 5  7  3  8 10 12  7  5]
 [ 7 10  3 10  3  5  7  6]
 [ 4  8  2  7  6  5  5  4]
 [ 3  3  5  2  6  3  2  3]
 [ 4  2  7  4  7  4  3  6]
 [ 4  6  7  2  4  5  3  4]]
In [29]:
print(accuracy_score(pred, yte))
0.1325
In [ ]: